{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "E:\\Program Files (x86)\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:5: DeprecationWarning: numpy.core.umath_tests is an internal NumPy module and should not be imported. It will be removed in a future NumPy release.\n",
      "  \"\"\"\n"
     ]
    }
   ],
   "source": [
    "#看看将数据集按年龄或者婚姻分类后预测结果\n",
    "# -*- coding: UTF-8 -*-\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import numpy.core.umath_tests\n",
    "import matplotlib as mpl\n",
    "import matplotlib.pyplot as plt\n",
    "%matplotlib inline\n",
    "import seaborn as sns\n",
    "sns.set(style=\"whitegrid\")\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.model_selection import GridSearchCV\n",
    "from sklearn.model_selection import learning_curve\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.preprocessing import StandardScaler\n",
    "from sklearn.ensemble import RandomForestRegressor\n",
    "from sklearn.metrics import mean_squared_error"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "851751.5494822611\n"
     ]
    }
   ],
   "source": [
    "df1 = pd.read_csv('data/cleaned.csv')\n",
    "df1.sort_values(\"User_ID\",inplace=True)\n",
    "df1=df1.reset_index(drop=True)\n",
    "df1.drop(['Product_ID','Product_Category_1','Product_Category_2','Product_Category_3'], axis=1,inplace=True)\n",
    "tmp_df1 = df1[['User_ID','Purchase']]\n",
    "tmp_df2 = df1[['User_ID','Gender','Age','Occupation','City_Category','Stay_In_Current_City_Years','Marital_Status']]\n",
    "\n",
    "tmp_df = tmp_df1.groupby('User_ID').sum()\n",
    "tmp_df2=tmp_df2.drop_duplicates(['User_ID'])\n",
    "tmp_df2.sort_values(\"User_ID\",inplace=True)\n",
    "tmp_df2=tmp_df2.reset_index(drop=True)\n",
    "\n",
    "tmp_df=tmp_df.reset_index(drop=True)\n",
    "df = pd.concat([tmp_df2,tmp_df['Purchase']],axis=1)\n",
    "print(df['Purchase'].mean())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "User_ID           int64\n",
      "Gender            int32\n",
      "Marital_Status    int64\n",
      "Purchase          int64\n",
      "0-17              uint8\n",
      "18-25             uint8\n",
      "26-35             uint8\n",
      "36-45             uint8\n",
      "46-50             uint8\n",
      "51-55             uint8\n",
      "55+               uint8\n",
      "A                 uint8\n",
      "B                 uint8\n",
      "C                 uint8\n",
      "0                 uint8\n",
      "1                 uint8\n",
      "2                 uint8\n",
      "3                 uint8\n",
      "4+                uint8\n",
      "0                 uint8\n",
      "1                 uint8\n",
      "2                 uint8\n",
      "3                 uint8\n",
      "4                 uint8\n",
      "5                 uint8\n",
      "6                 uint8\n",
      "7                 uint8\n",
      "8                 uint8\n",
      "9                 uint8\n",
      "10                uint8\n",
      "11                uint8\n",
      "12                uint8\n",
      "13                uint8\n",
      "14                uint8\n",
      "15                uint8\n",
      "16                uint8\n",
      "17                uint8\n",
      "18                uint8\n",
      "19                uint8\n",
      "20                uint8\n",
      "dtype: object\n",
      "   User_ID  Gender  Marital_Status  Purchase  0-17  18-25  26-35  36-45  \\\n",
      "0        0       0               0    333481     1      0      0      0   \n",
      "1        1       1               0    810353     0      0      0      0   \n",
      "2        2       1               0    341635     0      0      1      0   \n",
      "3        3       1               1    205987     0      0      0      0   \n",
      "4        4       1               1    821001     0      0      1      0   \n",
      "\n",
      "   46-50  51-55 ...  11  12  13  14  15  16  17  18  19  20  \n",
      "0      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "1      0      0 ...   0   0   0   0   0   1   0   0   0   0  \n",
      "2      0      0 ...   0   0   0   0   1   0   0   0   0   0  \n",
      "3      1      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "4      0      0 ...   0   0   0   0   0   0   0   0   0   1  \n",
      "\n",
      "[5 rows x 40 columns]\n"
     ]
    }
   ],
   "source": [
    "#特征工程 解决编码问题\n",
    "#User_ID和Product_ID使用序号，Gender和Marray使用二元类型，其他使用热编码\n",
    "le_U_ID = LabelEncoder()\n",
    "df['User_ID'] = le_U_ID.fit_transform(df['User_ID'])\n",
    "le_P_ID = LabelEncoder()\n",
    "df['Gender'] = np.where(df['Gender']=='M',1,0) # Female: 0, Male: 1\n",
    "df_Age = pd.get_dummies(df.Age)\n",
    "df_CC = pd.get_dummies(df.City_Category)\n",
    "df_SIC = pd.get_dummies(df.Stay_In_Current_City_Years)\n",
    "df_encoded = pd.concat([df,df_Age,df_CC,df_SIC],axis=1)\n",
    "df_encoded.drop(['Age','City_Category','Stay_In_Current_City_Years'],axis=1,inplace=True)\n",
    "df_ocup = pd.get_dummies(df.Occupation)\n",
    "df_encoded = pd.concat([df_encoded,df_ocup],axis=1)\n",
    "df_encoded.drop(['Occupation'],axis=1,inplace=True)\n",
    "print(df_encoded.dtypes)\n",
    "print(df_encoded.head(5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "      User_ID  Gender  Marital_Status  Purchase  0-17  18-25  26-35  36-45  \\\n",
      "0           0       0               0    333481     1      0      0      0   \n",
      "1           1       1               0    810353     0      0      0      0   \n",
      "2           2       1               0    341635     0      0      1      0   \n",
      "5           5       0               0    379450     0      0      0      0   \n",
      "8           8       1               0    593960     0      0      1      0   \n",
      "10         10       0               0    556902     0      0      1      0   \n",
      "11         11       1               0    120801     0      0      1      0   \n",
      "13         13       1               0    127629     0      0      0      1   \n",
      "14         14       1               0   1047124     0      0      1      0   \n",
      "16         16       1               0   1425501     0      0      0      0   \n",
      "17         17       0               0   1978675     0      1      0      0   \n",
      "18         18       1               0   1457938     1      0      0      0   \n",
      "19         19       1               0    185747     0      0      1      0   \n",
      "20         20       1               0    126744     0      1      0      0   \n",
      "21         21       1               0   1279678     0      1      0      0   \n",
      "24         24       1               0    534215     0      1      0      0   \n",
      "28         28       1               0    696201     0      0      0      1   \n",
      "29         29       0               0    260985     0      0      0      1   \n",
      "30         30       1               0    496154     0      0      0      0   \n",
      "33         33       0               0    807747     0      1      0      0   \n",
      "35         35       1               0   2316532     0      0      1      0   \n",
      "36         36       0               0    248370     0      0      1      0   \n",
      "37         37       1               0    381977     0      1      0      0   \n",
      "38         38       0               0    146237     0      1      0      0   \n",
      "40         40       1               0    137548     0      0      1      0   \n",
      "43         43       1               0    138367     0      1      0      0   \n",
      "48         48       0               0    200772     1      0      0      0   \n",
      "49         49       1               0    523039     0      1      0      0   \n",
      "50         50       1               0   4270371     0      0      1      0   \n",
      "52         52       0               0    134073     0      0      0      1   \n",
      "...       ...     ...             ...       ...   ...    ...    ...    ...   \n",
      "5832     5832       1               0    819458     0      0      0      1   \n",
      "5833     5833       1               0    209102     0      0      1      0   \n",
      "5834     5834       1               0    206876     0      0      1      0   \n",
      "5839     5839       0               0    441528     1      0      0      0   \n",
      "5842     5842       0               0    271932     0      1      0      0   \n",
      "5847     5847       0               0    328841     0      0      1      0   \n",
      "5848     5848       1               0    658759     0      1      0      0   \n",
      "5853     5853       0               0   1472117     0      0      0      0   \n",
      "5854     5854       0               0    308872     0      0      1      0   \n",
      "5856     5856       0               0    465550     1      0      0      0   \n",
      "5857     5857       1               0   1595982     0      0      0      1   \n",
      "5858     5858       1               0    259682     0      1      0      0   \n",
      "5859     5859       0               0    254736     0      0      1      0   \n",
      "5860     5860       1               0   1710954     0      0      0      1   \n",
      "5861     5861       1               0   1142042     0      0      0      1   \n",
      "5863     5863       0               0    609155     0      0      1      0   \n",
      "5865     5865       0               0    255812     0      0      1      0   \n",
      "5867     5867       0               0    126398     0      0      0      1   \n",
      "5868     5868       1               0    911951     0      0      0      1   \n",
      "5869     5869       1               0    503361     0      0      1      0   \n",
      "5872     5872       1               0    242077     0      0      1      0   \n",
      "5875     5875       0               0    985215     0      0      1      0   \n",
      "5878     5878       1               0    335351     0      1      0      0   \n",
      "5881     5881       0               0    266795     0      1      0      0   \n",
      "5882     5882       1               0    480614     0      0      0      0   \n",
      "5884     5884       1               0    187070     0      0      1      0   \n",
      "5885     5885       0               0    926272     0      0      1      0   \n",
      "5887     5887       0               0   1075037     0      0      0      0   \n",
      "5888     5888       0               0     80859     0      0      0      0   \n",
      "5890     5890       1               0   1562615     0      0      1      0   \n",
      "\n",
      "      46-50  51-55 ...  11  12  13  14  15  16  17  18  19  20  \n",
      "0         0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "1         0      0 ...   0   0   0   0   0   1   0   0   0   0  \n",
      "2         0      0 ...   0   0   0   0   1   0   0   0   0   0  \n",
      "5         0      1 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "8         0      0 ...   0   0   0   0   0   0   1   0   0   0  \n",
      "10        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "11        0      0 ...   0   1   0   0   0   0   0   0   0   0  \n",
      "13        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "14        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "16        0      1 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "17        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "18        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "19        0      0 ...   0   0   0   1   0   0   0   0   0   0  \n",
      "20        0      0 ...   0   0   0   0   0   1   0   0   0   0  \n",
      "21        0      0 ...   0   0   0   0   1   0   0   0   0   0  \n",
      "24        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "28        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "29        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "30        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "33        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "35        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "36        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "37        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "38        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "40        0      0 ...   0   1   0   0   0   0   0   0   0   0  \n",
      "43        0      0 ...   0   0   0   0   0   0   0   0   1   0  \n",
      "48        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "49        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "50        0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "52        0      0 ...   0   1   0   0   0   0   0   0   0   0  \n",
      "...     ...    ... ...  ..  ..  ..  ..  ..  ..  ..  ..  ..  ..  \n",
      "5832      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5833      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5834      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5839      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5842      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5847      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5848      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5853      1      0 ...   0   0   0   0   0   0   1   0   0   0  \n",
      "5854      0      0 ...   0   0   0   0   1   0   0   0   0   0  \n",
      "5856      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5857      0      0 ...   0   0   0   0   0   0   1   0   0   0  \n",
      "5858      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5859      0      0 ...   0   1   0   0   0   0   0   0   0   0  \n",
      "5860      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5861      0      0 ...   0   0   0   0   1   0   0   0   0   0  \n",
      "5863      0      0 ...   0   0   0   0   0   0   0   0   0   1  \n",
      "5865      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5867      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5868      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5869      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5872      0      0 ...   0   0   0   0   0   0   1   0   0   0  \n",
      "5875      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5878      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5881      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5882      1      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5884      0      0 ...   0   0   0   1   0   0   0   0   0   0  \n",
      "5885      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5887      1      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5888      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "5890      0      0 ...   0   0   0   0   0   0   0   0   0   0  \n",
      "\n",
      "[3417 rows x 40 columns]\n"
     ]
    }
   ],
   "source": [
    "df_Female=df_encoded[df_encoded['Gender']==1]\n",
    "df_Male=df_encoded[df_encoded['Gender']==0]\n",
    "df_Marry=df_encoded[df_encoded['Marital_Status']==1]\n",
    "df_Unmarry=df_encoded[df_encoded['Marital_Status']==0]\n",
    "print(df_Unmarry)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df_frac_Male = df_Male.sample(frac=0.02,random_state=100)\n",
    "X_Male = df_frac_Male.drop(['Purchase','User_ID'], axis=1)\n",
    "y_Male = df_frac_Male['Purchase']\n",
    "X_train_Male,X_test_Male,y_train_Male,y_test_Male = train_test_split(X_Male,y_Male,random_state=100)\n",
    "\n",
    "df_frac_Female = df_Female.sample(frac=0.02,random_state=100)\n",
    "X_Female = df_frac_Female.drop(['Purchase','User_ID'], axis=1)\n",
    "y_Female = df_frac_Female['Purchase']\n",
    "X_train_Female,X_test_Female,y_train_Female,y_test_Female = train_test_split(X_Female,y_Female,random_state=100)\n",
    "\n",
    "df_frac_Marry = df_Marry.sample(frac=0.02,random_state=100)\n",
    "X_Marry = df_frac_Marry.drop(['Purchase','User_ID'], axis=1)\n",
    "y_Marry = df_frac_Marry['Purchase']\n",
    "X_train_Marry,X_test_Marry,y_train_Marry,y_test_Marry = train_test_split(X_Marry,y_Marry,random_state=100)\n",
    "\n",
    "df_frac_Unmarry = df_Unmarry.sample(frac=0.02,random_state=100)\n",
    "X_Unmarry = df_frac_Unmarry.drop(['Purchase','User_ID'], axis=1)\n",
    "y_Unmarry = df_frac_Unmarry['Purchase']\n",
    "X_train_Unmarry,X_test_Unmarry,y_train_Unmarry,y_test_Unmarry = train_test_split(X_Unmarry,y_Unmarry,random_state=100)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "param_grid = {'n_estimators':[1,3,10,30,100,150,300,400,500],'max_depth':[1,3,5,7,9,11]}\n",
    "grid_rf_Male = GridSearchCV(RandomForestRegressor(),param_grid,cv=10,scoring='neg_mean_squared_error').fit(X_train_Male,y_train_Male)\n",
    "grid_rf_Female = GridSearchCV(RandomForestRegressor(),param_grid,cv=10,scoring='neg_mean_squared_error').fit(X_train_Female,y_train_Female)\n",
    "grid_rf_Marry = GridSearchCV(RandomForestRegressor(),param_grid,cv=10,scoring='neg_mean_squared_error').fit(X_train_Marry,y_train_Marry)\n",
    "grid_rf_Unmarry = GridSearchCV(RandomForestRegressor(),param_grid,cv=10,scoring='neg_mean_squared_error').fit(X_train_Unmarry,y_train_Unmarry)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Male Best parameter: {'max_depth': 1, 'n_estimators': 10}\n",
      "Male Best score: 564057.40\n",
      "Female Best parameter: {'max_depth': 1, 'n_estimators': 3}\n",
      "Female Best score: 649073.09\n",
      "Marry Best parameter: {'max_depth': 1, 'n_estimators': 3}\n",
      "Marry Best score: 964901.52\n",
      "Unmarry Best parameter: {'max_depth': 1, 'n_estimators': 30}\n",
      "Unmarry Best score: 970840.15\n"
     ]
    }
   ],
   "source": [
    "print('Male Best parameter: {}'.format(grid_rf_Male.best_params_))\n",
    "print('Male Best score: {:.2f}'.format((-1*grid_rf_Male.best_score_)**0.5))\n",
    "\n",
    "print('Female Best parameter: {}'.format(grid_rf_Female.best_params_))\n",
    "print('Female Best score: {:.2f}'.format((-1*grid_rf_Female.best_score_)**0.5))\n",
    "\n",
    "print('Marry Best parameter: {}'.format(grid_rf_Marry.best_params_))\n",
    "print('Marry Best score: {:.2f}'.format((-1*grid_rf_Marry.best_score_)**0.5))\n",
    "\n",
    "print('Unmarry Best parameter: {}'.format(grid_rf_Unmarry.best_params_))\n",
    "print('Unmarry Best score: {:.2f}'.format((-1*grid_rf_Unmarry.best_score_)**0.5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_sizes_Male, train_scores_Male, valid_scores_Male = learning_curve(RandomForestRegressor(max_depth=1, n_estimators=10), X_train_Male, y_train_Male, cv=10, scoring='neg_mean_squared_error')\n",
    "\n",
    "train_sizes_Female, train_scores_Female, valid_scores_Female = learning_curve(RandomForestRegressor(max_depth=1, n_estimators=3), X_train_Female, y_train_Female, cv=10, scoring='neg_mean_squared_error')\n",
    "\n",
    "train_sizes_Marry, train_scores_Marry, valid_scores_Marry = learning_curve(RandomForestRegressor(max_depth=1, n_estimators=3), X_train_Marry, y_train_Marry, cv=10, scoring='neg_mean_squared_error')\n",
    "\n",
    "train_sizes_Unmarry, train_scores_Unmarry, valid_scores_Unmarry = learning_curve(RandomForestRegressor(max_depth=1, n_estimators=30), X_train_Unmarry, y_train_Unmarry, cv=10, scoring='neg_mean_squared_error')\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<matplotlib.legend.Legend at 0x245be093588>"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZYAAAEPCAYAAABhkeIdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAHBpJREFUeJzt3XuUnXV97/F3ZnIxhHJR7uZEWz18wQYIHRDD/WYVkESRyAKK1lY4LMBLu+w6IpF6S3pWqUW5KZZKlJDCgXI80gNGIQpIpZFRLoPwlSKXBmgoBhHGZJLM5PzxPBM3m2FuefbeM5n3ay0Wez+/3/M839/Myv7Mc9m/Z9KmTZuQJKkqba0uQJK0dTFYJEmVMlgkSZUyWCRJlTJYJEmVMlgkSZUyWCRJlTJYJEmVMlgkSZUyWCRJlTJYJEmVmtzqApqls7NzGnAg8CzQ2+JyJGm8aAd2B37S0dHRM5wVJkywUITKXa0uQpLGqcOAHw2n40QKlmcB9txzT6ZOndrqWoatq6uL2bNnt7qMpnLME4NjHh/Wr1/PL37xCyg/Q4djIgVLL8DUqVOZNm1aq2sZkfFWbxUc88TgmMeVYV9C8OK9JKlSBoskqVIT6VSYJA1LX18fq1atoru7u9LtTp48mYcffrjSbVZlxowZzJw5k7a2LT/eMFgkqc7zzz/PpEmTiIhKPmj7dXd3M2PGjMq2V5W+vj6efvppnn/+eXbZZZct3p6nwiSpzq9//Wt23XXXSkNlLGtra2PXXXflxRdfrGZ7lWxFkrYivb29TJkypdVlNNWUKVPYuHFjJdsyWCRpAJMmTWp1CU1V5XgNFknaSnzqU5/ipptuYvXq1Zx55pkD9omIhtdhsEjSVmbXXXflH/7hH1q2f4NFksaw8847j+XLl29+f9JJJ7Fy5UpOPfVU3ve+93HMMcdw2223vWKdVatWcfTRR29+feqppzJ//nwuvPDCptTs7caSNIgV9z7F91c+Vcm2ent7aW9v3/z+nW+fxdEHzBp0nfnz53PzzTfzrne9iyeeeIKenh6WLl3KF7/4Rd7ylrfw4x//mMWLF3PssccOuP4XvvAFTjrpJBYsWMC3v/1trr/++krGMhiPWCRpDDviiCP42c9+xssvv8y//Mu/MG/ePC666CIeffRRLr/8cq6++upBv8i5cuVKjjvuOADmzZvXlLvdPGKRpEEcfcDQRxXDNZovSE6dOpWjjjqKFStW8N3vfpcrr7yS0047jYMOOoiDDjqIuXPn8slPfnLQbWzatAko7vxqxndzPGKRpDFu/vz5XH311eywww7MmDGDJ554go9//OMcfvjh3H777fT2vvbEwwcffDDf+c53APje975HT8+wntW1RQwWSRrjOjo6eOmll5g3bx477LADJ598MieccALHHXcc3d3drFu3jt/+9rcDrnvhhReyfPly5s2bxx133NGUKWUm9R8ibe06OzvfDDw+e/bscfU8hM7OTjo6OlpdRlM55olhLI/54YcfZu+99658u2N1rrB+A427p6eHrq4ugN/v6Oh4Yjjb8YhFklQpg0WSVCmDRZJUKYNFklQpg0WSVCmDRZJUKYNFklSplgRLRJwWET+PiEcj4twB2udExL0R8YuIuCoiJte17x8Rjf/6qCS12EsvvcS5577qY/I1Pfjgg1xwwQUNrGhoTQ+WiHgjsAg4FJgDnBURb6vrthQ4LzP3BCYBZ9asvw1wKTC1ORVLUuu8+OKLPPzww8Puv88++7Bo0aIGVjS0VkxCeSywIjPXAETEjcDJwOfL928CpmfmPWX/JcDngK+W778EfBk4pIk1S5qgXnrgh7x0/4pKttXb28uLNdPm/95+R/N7+x456Dpf/OIXee655zj33HN57LHH2HHHHXnd617HpZdeyqc//WlWr17Nc889x9y5c1m0aBErV67ksssu45prruGMM85gn332obOzkzVr1rBw4UKOOOKISsYymFacCtsDeLbm/bPAzOG0R8Q8YJvMvLHRRUrSWLBw4UJ22WUXzj//fB5//HEuuugirr76an74wx+y9957c/3117N8+XJ+8pOf8NBDD71q/Q0bNnD99ddz/vnn85WvfKUpNbfiiKUNqJ2gbBLQN1R7ROwGLKQ44hm1cs6bcaWzs7PVJTSdY54YxuqYJ0+evPkZJ21vOZDt33Jgw/Y12LNUANauXUtfXx9r167l9a9/PTvuuCPd3d0cddRRdHV18fWvf53HH3+cF154gTVr1rBp0yZ6e3vp7u6mt7eXAw88kO7ubmbOnMkLL7ww6P7Wr19fye+kFcGyCjis5v1uwDN17bsP0P4e4A3AnREBQETcBxyWmS8Nd+dOQjn2OeaJYSyP+eGHH27IZJGjmYRy+vTptLW1MX36dKZPn755/WuuuYbly5fzgQ98gCOPPJLHH39882dbe3s7M2bMoL29ne23354ZM2awzTbbMGnSpEH3P3XqVPbbb79XLKuZhHLYWhEstwGfjYidgW7g/cBZ/Y2Z+WRErIuIQzLzbuAM4NbMvAq4qr9fRGzKzDlNrl2Smmry5Mls3LjxVcvvvvtuTjnlFE488UQefPBBHnnkEfr6+pryIK+hNL2CzHwauAD4AXAfsCwzV0bELRFxQNntdODiiHgE2Ba4pNl1StJY8IY3vIE99tiD888//xXLP/ShD3HZZZdx4oknsnjxYvbff39WrVrVoipfqSWPJs7MZcCyumXH17y+H3j7ENuY1JjqJGnsmDJlCtddd92rls+dO5fly5cPuM5BBx0EFKfL+s2cOZMVK6q5u20orT9mkiRtVQwWSVKlDBZJGsBEeWx7vyrHa7BIUp329nY2bNjQ6jKaasOGDUyeXM1ld4NFkurssMMOrF69mr6+vqE7bwX6+vpYvXo122+/fSXba8ldYZI0lu20006sWrWKzKx0u+vXr2fq1LE5f+6MGTPYaaedKtmWwSJJddra2pg1a1bl2+3s7HzVN9u3Rp4KkyRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFVqcit2GhGnAQuBKcCXM/PyuvY5wFXAdsCdwNmZuTEiDgEuBqYCvwL+LDOfbGrxkqRBNf2IJSLeCCwCDgXmAGdFxNvqui0FzsvMPYFJwJnl8muBj2TmnPL1Jc2pWpI0XK04FXYssCIz12RmN3AjcHJ/Y0S8CZiemfeUi5YACyJiGrAwMx8olz8AzGpe2ZKk4WjFqbA9gGdr3j8LvH2I9pmZ2UNxJENEtAGfBb7d0EolSSPWimBpAzbVvJ8E9A23PSKmAt+kqH3xSHfe1dU10lVarrOzs9UlNJ1jnhgc89apFcGyCjis5v1uwDN17bsP1B4R2wLfobhwPz8zN4x057Nnz2batGkjXa1lOjs76ejoaHUZTeWYJwbHPD709PSM+A/yVlxjuQ04JiJ2johtgPcD3+1vLO/yWlfeAQZwBnBr+Xop8O/AKeWpMUnSGNP0YMnMp4ELgB8A9wHLMnNlRNwSEQeU3U4HLo6IR4BtgUsiYn9gPnAI8NOIuC8ibml2/ZKkwbXkeyyZuQxYVrfs+JrX9/PKC/oAP6O43iJJGsP85r0kqVIGiySpUgaLJKlSBoskqVIGiySpUgaLJKlSBoskqVIGiySpUgaLJKlSBoskqVIGiySpUgaLJKlSBoskqVIGiySpUgaLJKlSBoskqVIGiySpUgaLJKlSBoskqVKjDpaI2KnKQiRJW4dBgyUivlfz+vy65u8hSVKdoY5Ydq55vaCubVLFtUiStgJDBcummtf1QbIJSZLqDBUstWFikEiShjSSIxZJkoY0eYj2vSLigfL1W2teTwL+oHFlSZLGq6GC5bimVCFJ2moMGiyZeUf9soh4PfBCZnqaTJL0KkN9j2W7iFgaEUeU7/8J+C/g3yPirc0oUJI0vgx18f7vgJeAhyLieOAY4M3AeWWbJEmvMFSwzAXOycznKa633JSZ/5GZtwJ7Nrw6SdK4M9TF+40111IO5pVHKaP+5n1EnAYsBKYAX87My+va5wBXAdsBdwJnZ+bGiJgFLAV2ARI4PTNfHm0dkqTqDXXE0hsR20fEG4F9gR8AlO/Xj2aH5bqLgEOBOcBZEfG2um5LgfMyc0+KADuzXH4FcEVm7gXcC3xmNDVIkhpnqGC5DPgpcBdwfWb+Z0ScSDEB5VdHuc9jgRWZuSYzu4EbgZP7GyPiTcD0zLynXLQEWBARU4DDy/6bl4+yBklSgwx1u/GSiHgI2A24tVy8E/C3mfnNUe5zD+DZmvfPAm8fon1mud/fZObGuuWSpDFkqGssZOZP6t5fvYX7bOPVk1v2DaO9fjl16w1LV1fXSFdpuc7OzlaX0HSOeWJwzFunQYOlZgqXAWXmvqPY5yrgsJr3uwHP1LXvPkD7c8D2EdGemb1ln9r1hmX27NlMmzZtxEW3SmdnJx0dHa0uo6kc88TgmMeHnp6eEf9BPtQ1lm0pnsnyXeATwEfr/huN24BjImLniNgGeH+5fQAy80lgXUQcUi46A7g1MzdQXOs5pVz+QX53ek6SNEYMGiyZ+QfAB4DXU9yRdQLwX5l5x0DTvQxHZj4NXEBxh9l9wLLMXBkRt0TEAWW304GLI+IRinC7pFx+DsVdZD+nOOpZOJoaJEmNM5xrLHcBd0XEdOB9FB/42wHfysxR3RmWmcuAZXXLjq95fT+vvKDfv/xJ4MjR7FOS1BxDnQrbLDPXAv8buJziOyyLG1WUJGn8GvKIBSAi3kFxTeMkoJPiOyzfbmBdkqRxaqi7wv4a+BPgZeBbwH6ZuboZhUmSxqehjlj+GniK4hbgo4CjImJzY2bOa1xpkqTxaKhg+XBTqpAkbTWGmtLlNadtiYh3Vl+OJGm8G+oayx8BlwJrgA9n5vPl1PVfpng+y/TGlyhJGk+Gut34q8BNwC+BhRExH3gAmAHs1+DaJEnj0FDXWLbPzC9FRDvwC4pv4Z+dmdc1vjRJ0ng01BHLbwHKSR9fB5xgqEiSBjNUsNQ+fvj5zPxZI4uRJI1/Q50Ka4uIHSkDpvY1QGauaWBtkqRxaKhg2Qd4nt+Fya9q2jYB7Y0oSpI0fg31PZZhT1IpSRKMYHZjSZKGw2CRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVymCRJFXKYJEkVcpgkSRVaqgnSFYuImYBS4FdgAROz8yX6/pMBf4ROABYC5yWmY9ExLbAN4C9KJ5quSgzr2tm/ZKkwbXiiOUK4IrM3Au4F/jMAH0+BnRn5t7AJ4Al5fJPAU9l5r7AMcDfR8SujS9ZkjRcTQ2WiJgCHA7cWC5aAiwYoOsJwLUAmXknsHN5pHMHcEm5/DlgDbBbY6uWJI1Es0+F7QT8JjM3lu+fBWYO0G+Pso3afpn5/f4FEfEBYBrwUINqlSSNQsOCJSIWABfXLX4U2FS3rG+A1dvq+k2q7Vdu+yvAu2tCali6urpG0n1M6OzsbHUJTeeYJwbHvHVqWLBk5g3ADbXLylNhv4qI9szsBXYHnhlg9VVl22Pl+936+0XER4G/Av44Mx8caV2zZ89m2rRpI12tZTo7O+no6Gh1GU3lmCcGxzw+9PT0jPgP8qZeY8nMDcBdwCnlog8Ctw7Q9ZayjYg4FFiXmU9FxHuBvwAOGU2oSJIar+m3GwPnAN+MiIXAU8CpABFxNrBHZl4IXApcGREPAT3AGeW6nwOmAzdHRP/2PpKZ9zaxfknSIJoeLJn5JHDkAMu/VvN6HfChAfrs19DiJElbzG/eS5IqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqZbBIkiplsEiSKmWwSJIqNbnZO4yIWcBSYBcggdMz8+W6PlOBfwQOANYCp2XmIzXtk4G7gCszc0mTSpckDUMrjliuAK7IzL2Ae4HPDNDnY0B3Zu4NfAJYUtd+IbBnI4uUJI1OU4MlIqYAhwM3louWAAsG6HoCcC1AZt4J7Fwe6RARBwP7ATc3ul5J0sg1+4hlJ+A3mbmxfP8sMHOAfnuUbdT2i4jtgIuBsxpapSRp1Bp2jSUiFlCEQK1HgU11y/oGWL2trt+kst/lwOLMXB0Ro6qrq6trVOu1UmdnZ6tLaDrHPDE45q1Tw4IlM28AbqhdVp4K+1VEtGdmL7A78MwAq68q2x4r3+9GcdRyDLBPRHwOmAUcHREbMvPa4dY1e/Zspk2bNuLxtEpnZycdHR2tLqOpHPPE4JjHh56enhH/Qd7Uu8Iyc0NE3AWcAiwDPgjcOkDXW8q2H0XEocC6zHyS4hQZABGxBPjhSEJFktR4rbgr7BzgrIj4OXAYsBAgIs6OiM+XfS4FpkXEQ8AlwBktqFOSNApN/x5LeeRx5ADLv1bzeh3woSG286dV1yZJ2nJ+816SVCmDRZJUKYNFklQpg0WSVCmDRZJUKYNFklQpg0WSVCmDRZJUKYNFklQpg0WSVCmDRZJUKYNFklQpg0WSVCmDRZJUKYNFklQpg0WSVCmDRZJUKYNFklQpg0WSVCmDRZJUKYNFklQpg0WSVCmDRZJUqcmtLqCJ2gHWr1/f6jpGrKenp9UlNJ1jnhgc89hX85nZPtx1Jm3atKkx1YwxnZ2dhwJ3tboOSRqnDuvo6PjRcDpOpCOWnwCHAc8CvS2uRZLGi3Zgd4rP0GGZMEcskqTm8OK9JKlSBoskqVIGiySpUgaLJKlSBoskqVIGiySpUgaLJKlSE+kLkmNWRMwClgK7AAmcnpkv1/WZCvwjcACwFjgtMx+paZ9MMbPAlZm5pEmlj9qWjDkitgW+AewFTAIWZeZ1zax/JCLiNGAhMAX4cmZeXtc+B7gK2A64Ezg7MzcO52c0Vm3BmA8BLgamAr8C/iwzn2xq8aM02jHXtO8P3JOZ05pXdWN4xDI2XAFckZl7AfcCnxmgz8eA7szcG/gEsKSu/UJgz0YWWbEtGfOngKcyc1/gGODvI2LXxpc8chHxRmARcCgwBzgrIt5W120pcF5m7kkRlGeWy4fzMxpztnDM1wIfycw55etLmlP1ltnCMRMR2wCXUgTquGewtFhETAEOB24sFy0BFgzQ9QSKf2hk5p3AzuVftETEwcB+wM2NrrcKFYz5DsoPnMx8DlgD7NbYqkftWGBFZq7JzG6KMZ/c3xgRbwKmZ+Y95aIlwIIR/IzGotGOeRqwMDMfKJc/AMxqXtlbZFRjrln/S8CXm1RrwxksrbcT8JuaQ+JngZkD9NujbKO2X0RsR3Hq4KyGVlmtLRpzZn4/M58CiIgPANOAhxpY75YYcAzDaB/uz2gsGtWYM7MnM5cCREQb8Fng240ttTKj/T0TEfOAbTLzRrYSXmNpoohYQBECtR4F6ids6xtg9ba6fpPKfpcDizNzdURUVWplGjTm2m1/BXh37bnqMWbQMQzSXr8cBv4ZjUWjHTOw+draNyk+nxY3rsxKjWrMEbEbxXWZYxteYRMZLE2UmTcAN9QuK095/Coi2jOzl2IW0WcGWH1V2fZY+X43ir96jgH2iYjPUZw2ODoiNmTmtQ0axog0YMzPlNv4KPBXwB9n5oMNKr8Kqyhm1e63eQw17bsP0P4csP0wfkZj0WjHTHljxncoLtzPz8wNjS21MqMd83uANwB39v9hGBH3AYdl5kuNLLiRPBXWYuU/nLuAU8pFHwRuHaDrLWUbEXEosC4zn8zMPTJzTnmx8zvAhWMlVF7LFo75qYh4L/AXwCFjPFQAbgOOiYidywu07we+299Y3vG0rrwbCuAM4NYR/IzGolGNuXy9FPh34JTMHE9PxBrt7/mqzHxLzb9hytfjNlTAYBkrzqG4i+TnFH/1LASIiLMj4vNln0uBaRHxEMWF6zNaUml1tmTMnwOmAzdHxH3lfwc0t/zhycyngQuAHwD3Acsyc2VE3FJT8+nAxRHxCLAtv7sTasCf0Vg32jGXt9vOBw4Bflr+Xm9pwRBGbAt/z1sdn8ciSaqURyySpEoZLJKkShkskqRKGSySpEoZLJKkShksGpci4s0RsSki/rxu+ScjYkmF+3miWbcyR8R2EXF3RDwUESc1Y59D1PPZiLis1XVo/PGb9xrP+oAvRcSPMjNbXUwF5gC7ZuZbW12ItCUMFo1naylmhV0WEXMzc31tY3nk0pWZf1f/PiKeAJYBRwM7An9L8cW8DmADMC8z+6fkODci9qOY7PJLmfmNcnsnUnxpcSrwW+CTmfnjiPgsMJdi4sH7M/NP6up6L/DXFGcMXgL+EniR4hkzbyyn9JibmWtr1jmp3Fcf0Av8VWbeGRHvKGufRjFlyPcz888j4s3ACuD75ZgmUzxa4X9QPMfmXuBUimmA7qD4lvhBFHNYnZeZd9XV/EbgsrL/FOC6zFxcPgfo0vJntwH4JfDh8fLcGDWGp8I03i0CXmZ0kxW+LjPfQfGB+3XgK5m5H/AfwJ/W9FubmX8EvBP4m4j4w4j47+U+j8/M/Slml74pImaU67wJ2H+AUNkL+Brw/nJfFwL/l2Let48Aj5VTeqzllS4CzsnMAyiey3JkufzjFNP4HAS8DZgXER1l2+8D/69c58cUE3aeCvwhxTf531H2mwXcUU4p8ing+nI+t1rXAN/IzA7g7cCx5czSc8ta9ivbfgnsO8DPWhOIwaJxLTP7gD8BPhwR7xzh6v9c/v8x4D8z8/6a96+v6Xdlua9ngO9RTPz5ToojhNvLI4xrKY4m+k9j3fMaMy4fDdyemb8st7mCYsLJjgH61roO+D8RcRW/O8IC+BCwQ0R8muLBYNMppguB4gii/xk9jwH/mpm/ycx1FBMg9o/xhcxcVtZzK8UR0eZwKMPyCOAL5VjvoQijOcCDZf9/i4gvAP+cmf86xFi0lTNYNO5l5n9QnOL5JsVzTPptoji106/+6Xy1kxwONotub83rtrJvO0VAzKmZQPAdQFfZ77VOBbXz6unw2yhOL72mzLyA4umE91IcTd1ZNt0JHA88AnweeJrfjXl9Ztbu67XGWB+AbbxyzO3lNg+uG+vizPw1xUPmPlmuc31EnDPYWLT1M1i0VSgfknQrxSOM+/0XcABAROxB8Vf3aPxpuY1ZFM/NuL3874/LU1tExPEUTzycPsS2bgfeFRF/UK53NPDfgH97rRUiYnJ5TWibzPwaxeSU+0bxOOYDgf+ZmTdRPDjqrRRBMBI7R8S7y32dSBFAm2eNzszfUByl/GXZZwfgbmB+RLynHNO/ZuZngW+VNWkCM1i0NfkY8GTN+0uB3SMigaspLmaPxusi4qcU0/h/NDN/kZk/p7iucl1E3A98geKC/6AXrcv1zqG4HtMF/C/gxMx8cZB1NlIE5rKyjhuAP8vM1cDfUMwE3EVxfeRufnc6brjWAWeU47gAeG/5DJhapwHviIgHKULwn8rHM9xK8fTOroi4FziYYvZpTWDObixNYOXdY12Zue1QfaXh8ohFklQpj1gkSZXyiEWSVCmDRZJUKYNFklQpg0WSVCmDRZJUKYNFklSp/w8j+3eYzI1BdQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "train_scores_Male = (-1*train_scores_Male)**0.5\n",
    "valid_scores_Male = (-1*valid_scores_Male)**0.5\n",
    "train_scores_mean_Male = np.mean(train_scores_Male, axis=1)\n",
    "train_scores_std_Male = np.std(train_scores_Male, axis=1)\n",
    "valid_scores_mean_Male = np.mean(valid_scores_Male, axis=1)\n",
    "valid_scores_std_Male = np.std(valid_scores_Male, axis=1)\n",
    "\n",
    "plt.figure()\n",
    "plt.plot(train_sizes_Male,valid_scores_mean_Male,label='valid')\n",
    "plt.plot(train_sizes_Male,train_scores_mean_Male,label='train')\n",
    "plt.fill_between(train_sizes_Male, train_scores_mean_Male - train_scores_std_Male, train_scores_mean_Male + train_scores_std_Male, alpha=0.3,color=\"g\")\n",
    "plt.fill_between(train_sizes_Male, valid_scores_mean_Male - valid_scores_std_Male,valid_scores_mean_Male + valid_scores_std_Male, alpha=0.3, color=\"b\")\n",
    "plt.xlabel('Number of samples')\n",
    "plt.ylabel('RMSE')\n",
    "#plt.title('Male')\n",
    "plt.legend()"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
